# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck -check-prefix=MUBUF-V2A %s
# RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -mattr=+enable-flat-scratch -run-pass=prologepilog -o - %s | FileCheck -check-prefix=FLATSCR-V2A %s

--- |

  define void @test_spill_v2_partial_agpr() #1 {
  entry:
    unreachable
  }

  define void @test_spill_v3_partial_agpr() #1 {
  entry:
    unreachable
  }

  define void @test_spill_v4_partial_agpr() #2 {
  entry:
    unreachable
  }

  define void @test_spill_v5_partial_agpr() #2 {
  entry:
    unreachable
  }

  define void @test_spill_v6_partial_agpr() #4 {
  entry:
    unreachable
  }

  define void @test_spill_v8_partial_agpr() #3 {
  entry:
    unreachable
  }

  define void @test_spill_v16_partial_agpr() #4 {
  entry:
    unreachable
  }

  attributes #1 = { nounwind "amdgpu-num-vgpr"="1" }
  attributes #2 = { nounwind "amdgpu-num-vgpr"="3" }
  attributes #3 = { nounwind "amdgpu-num-vgpr"="4" }
  attributes #4 = { nounwind "amdgpu-num-vgpr"="5" }
...

---
name:            test_spill_v2_partial_agpr
tracksRegLiveness: true
stack:
  - { id: 0, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4 }
machineFunctionInfo:
  scratchRSrcReg:  $sgpr0_sgpr1_sgpr2_sgpr3
  stackPtrOffsetReg: '$sgpr32'
  hasSpilledVGPRs: true
body:             |
  bb.0.entry:
    ; MUBUF-V2A-LABEL: name: test_spill_v2_partial_agpr
    ; MUBUF-V2A: liveins: $agpr0
    ; MUBUF-V2A-NEXT: {{  $}}
    ; MUBUF-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF
    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
    ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1
    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5)
    ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1
    ; MUBUF-V2A-NEXT: S_ENDPGM 0
    ; FLATSCR-V2A-LABEL: name: test_spill_v2_partial_agpr
    ; FLATSCR-V2A: liveins: $agpr0
    ; FLATSCR-V2A-NEXT: {{  $}}
    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF
    ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1
    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
    ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1
    ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5)
    ; FLATSCR-V2A-NEXT: S_ENDPGM 0
    $vgpr0_vgpr1 = IMPLICIT_DEF
    SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
    $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
    S_ENDPGM 0

...

---
name:            test_spill_v3_partial_agpr
tracksRegLiveness: true
stack:
  - { id: 0, name: '', type: spill-slot, offset: 0, size: 12, alignment: 4 }
machineFunctionInfo:
  scratchRSrcReg:  $sgpr0_sgpr1_sgpr2_sgpr3
  stackPtrOffsetReg: '$sgpr32'
  hasSpilledVGPRs: true
body:             |
  bb.0.entry:
    ; MUBUF-V2A-LABEL: name: test_spill_v3_partial_agpr
    ; MUBUF-V2A: liveins: $agpr0
    ; MUBUF-V2A-NEXT: {{  $}}
    ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0, addrspace 5)
    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
    ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2
    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load (s32) from %stack.0, addrspace 5)
    ; MUBUF-V2A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
    ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2
    ; MUBUF-V2A-NEXT: S_ENDPGM 0
    ; FLATSCR-V2A-LABEL: name: test_spill_v3_partial_agpr
    ; FLATSCR-V2A: liveins: $agpr0
    ; FLATSCR-V2A-NEXT: {{  $}}
    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
    ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2
    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2 :: (store (s64) into %stack.0, align 4, addrspace 5)
    ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2
    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2 :: (load (s64) from %stack.0, align 4, addrspace 5)
    ; FLATSCR-V2A-NEXT: S_ENDPGM 0
    $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
    SI_SPILL_V96_SAVE killed $vgpr0_vgpr1_vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s96) into %stack.0, align 4, addrspace 5)
    $vgpr0_vgpr1_vgpr2 = SI_SPILL_V96_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s96) from %stack.0, align 4, addrspace 5)
    S_ENDPGM 0

...

---
name:            test_spill_v4_partial_agpr
tracksRegLiveness: true
stack:
  - { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 }
machineFunctionInfo:
  scratchRSrcReg:  $sgpr0_sgpr1_sgpr2_sgpr3
  stackPtrOffsetReg: '$sgpr32'
  hasSpilledVGPRs: true
body:             |
  bb.0.entry:
    ; MUBUF-V2A-LABEL: name: test_spill_v4_partial_agpr
    ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2
    ; MUBUF-V2A-NEXT: {{  $}}
    ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5)
    ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec
    ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec
    ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3
    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0, addrspace 5)
    ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
    ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
    ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
    ; MUBUF-V2A-NEXT: S_ENDPGM 0
    ; FLATSCR-V2A-LABEL: name: test_spill_v4_partial_agpr
    ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2
    ; FLATSCR-V2A-NEXT: {{  $}}
    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
    ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
    ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
    ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5)
    ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
    ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
    ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
    ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0, addrspace 5)
    ; FLATSCR-V2A-NEXT: S_ENDPGM 0
    $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
    SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5)
    $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
    S_ENDPGM 0

...

---
name:            test_spill_v5_partial_agpr
tracksRegLiveness: true
stack:
  - { id: 0, name: '', type: spill-slot, offset: 0, size: 20, alignment: 4 }
machineFunctionInfo:
  scratchRSrcReg:  $sgpr0_sgpr1_sgpr2_sgpr3
  stackPtrOffsetReg: '$sgpr32'
  hasSpilledVGPRs: true
body:             |
  bb.0.entry:
    ; MUBUF-V2A-LABEL: name: test_spill_v5_partial_agpr
    ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2
    ; MUBUF-V2A-NEXT: {{  $}}
    ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0, addrspace 5)
    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
    ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec
    ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec
    ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s32) from %stack.0, addrspace 5)
    ; MUBUF-V2A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
    ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
    ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
    ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
    ; MUBUF-V2A-NEXT: S_ENDPGM 0
    ; FLATSCR-V2A-LABEL: name: test_spill_v5_partial_agpr
    ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2
    ; FLATSCR-V2A-NEXT: {{  $}}
    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
    ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
    ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s64) into %stack.0, align 4, addrspace 5)
    ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
    ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
    ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s64) from %stack.0, align 4, addrspace 5)
    ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
    ; FLATSCR-V2A-NEXT: S_ENDPGM 0
    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
    SI_SPILL_V160_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store (s160) into %stack.0, align 4, addrspace 5)
    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = SI_SPILL_V160_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s160) from %stack.0, align 4, addrspace 5)
    S_ENDPGM 0

...

---
name:            test_spill_v6_partial_agpr
tracksRegLiveness: true
stack:
  - { id: 0, name: '', type: spill-slot, offset: 0, size: 24, alignment: 4 }
machineFunctionInfo:
  scratchRSrcReg:  $sgpr0_sgpr1_sgpr2_sgpr3
  stackPtrOffsetReg: '$sgpr32'
  hasSpilledVGPRs: true
body:             |
  bb.0.entry:
    ; MUBUF-V2A-LABEL: name: test_spill_v6_partial_agpr
    ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4
    ; MUBUF-V2A-NEXT: {{  $}}
    ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0, addrspace 5)
    ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec
    ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec
    ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec
    ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec
    ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0, addrspace 5)
    ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec
    ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec
    ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
    ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
    ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
    ; MUBUF-V2A-NEXT: S_ENDPGM 0
    ; FLATSCR-V2A-LABEL: name: test_spill_v6_partial_agpr
    ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4
    ; FLATSCR-V2A-NEXT: {{  $}}
    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
    ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
    ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
    ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0, addrspace 5)
    ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
    ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
    ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
    ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
    ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
    ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0, addrspace 5)
    ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
    ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
    ; FLATSCR-V2A-NEXT: S_ENDPGM 0
    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
    SI_SPILL_V192_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s196) into %stack.0, align 4, addrspace 5)
    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s196) from %stack.0, align 4, addrspace 5)
    S_ENDPGM 0

...

---
name:            test_spill_v8_partial_agpr
tracksRegLiveness: true
stack:
  - { id: 0, name: '', type: spill-slot, offset: 0, size: 32, alignment: 4 }
machineFunctionInfo:
  scratchRSrcReg:  $sgpr0_sgpr1_sgpr2_sgpr3
  stackPtrOffsetReg: '$sgpr32'
  hasSpilledVGPRs: true
body:             |
  bb.0.entry:
    ; MUBUF-V2A-LABEL: name: test_spill_v8_partial_agpr
    ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3
    ; MUBUF-V2A-NEXT: {{  $}}
    ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0, addrspace 5)
    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
    ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec
    ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec
    ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec
    ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0, addrspace 5)
    ; MUBUF-V2A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
    ; MUBUF-V2A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
    ; MUBUF-V2A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
    ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec
    ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
    ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
    ; MUBUF-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
    ; MUBUF-V2A-NEXT: S_ENDPGM 0
    ; FLATSCR-V2A-LABEL: name: test_spill_v8_partial_agpr
    ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3
    ; FLATSCR-V2A-NEXT: {{  $}}
    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0, align 4, addrspace 5)
    ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
    ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
    ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
    ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s128) from %stack.0, align 4, addrspace 5)
    ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
    ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
    ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
    ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
    ; FLATSCR-V2A-NEXT: S_ENDPGM 0
    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
    SI_SPILL_V256_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.0, align 4, addrspace 5)
    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = SI_SPILL_V256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.0, align 4, addrspace 5)
    S_ENDPGM 0

...

---
name:            test_spill_v16_partial_agpr
tracksRegLiveness: true
stack:
  - { id: 0, name: '', type: spill-slot, offset: 0, size: 64, alignment: 4 }
machineFunctionInfo:
  scratchRSrcReg:  $sgpr0_sgpr1_sgpr2_sgpr3
  stackPtrOffsetReg: '$sgpr32'
  hasSpilledVGPRs: true
body:             |
  bb.0.entry:
    ; MUBUF-V2A-LABEL: name: test_spill_v16_partial_agpr
    ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4
    ; MUBUF-V2A-NEXT: {{  $}}
    ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0, addrspace 5)
    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 32, addrspace 5)
    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 36, addrspace 5)
    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 40, addrspace 5)
    ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec
    ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec
    ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec
    ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec
    ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0, addrspace 5)
    ; MUBUF-V2A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
    ; MUBUF-V2A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
    ; MUBUF-V2A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
    ; MUBUF-V2A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
    ; MUBUF-V2A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
    ; MUBUF-V2A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
    ; MUBUF-V2A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
    ; MUBUF-V2A-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
    ; MUBUF-V2A-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5)
    ; MUBUF-V2A-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 40, addrspace 5)
    ; MUBUF-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec
    ; MUBUF-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec
    ; MUBUF-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
    ; MUBUF-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
    ; MUBUF-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
    ; MUBUF-V2A-NEXT: S_ENDPGM 0
    ; FLATSCR-V2A-LABEL: name: test_spill_v16_partial_agpr
    ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4
    ; FLATSCR-V2A-NEXT: {{  $}}
    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0, align 4, addrspace 5)
    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
    ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec
    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr8_vgpr9_vgpr10, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0 + 32, align 4, addrspace 5)
    ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
    ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
    ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
    ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s128) from %stack.0, align 4, addrspace 5)
    ; FLATSCR-V2A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
    ; FLATSCR-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec
    ; FLATSCR-V2A-NEXT: $vgpr8_vgpr9_vgpr10 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0 + 32, align 4, addrspace 5)
    ; FLATSCR-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
    ; FLATSCR-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
    ; FLATSCR-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
    ; FLATSCR-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
    ; FLATSCR-V2A-NEXT: S_ENDPGM 0
    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
    SI_SPILL_V512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
    S_ENDPGM 0

...
